## This script produces all figures of the paper
## "Just don't call it a tax! Framing in an experiment on voting and redistribution"
##
## This script reads the data from observations.csv and prodcues all the figures in the paper. 
## The table of the paper is produced with STATA. Find the do-file separately. 
##
## The data is from experiments conducted with the software ztree. 
## The file minimumdecision_nocalc.ztt was used to conduct sessions which data is used.
## The file minimumdecision_questionaire.ztq was used to collect the questionaire data used.
## Both files can serve as data documentation also for the unused variables in the dataset. 
## Note: The ztree files are not made for didactical purposes and are provided "as is" 
##       for the sake of reproducibility. 
## Each case (row) in the data frame obtained from observations.csv corresponds to the activities 
##   of one subject in a particular period of a particular session. 
## Note: Many variables are not used for the analysis, but are there only in because they were used 
##   in the treatment.
## Below we produce data frame which includes only the needed data.
## This smaller dataset is documented in more detail below. 
##
## The dataset is a merged dataset from the "subjects" tables of sessions conducted at the dates 
## "131219_0946" "131219_1236" "140130_1230" "140130_1449" "140717_0908" "140717_1258" "140731_1029" "140731_1254"
## The values of the opinions tables (in particular the 10 proposals and the times when they were confirmed) 
## have been appended to the subjects tables.
## The questionaire data subjects entered after the experiments were also appended to the subjects tables.
## Due to technical crashes of two sessions, some data needed to be restored from gsf files. 
## Due to these crashes for some observations there is no proposal data (because it has not been extracted from gsf files)
## and no questionaire data (because the questionaire has not been conducted).
## For the paper only the sessions 140717_0908, 140717_1258, 140731_1029, 140731_1254 produced with minimumdecision.ztt are used. 
## The other sessions included a calculator. 
## The analysis and documentation of the impact of the calculator is not subject to the paper.
## The sessions without calculator can stand as an independent experiment because of the "between subject" design. 

## PRELIMINARIES 
library(e1071); # for skewness
library(plotrix)
# Enter the directory where you store the data
# Figures will also be written to that direcory.
setwd("/home/janlo/Dropbox/Minimum Income/Data/dataverse")
# library(readstata13)
# obss = read.dta13("observations.dta");

## TABLE OF DISTRIBUTIONS
n = 5; su = 1700*n; subinds=paste("S",1:5,sep="");
distr = function(n,su,di,m,s) {
  probs = seq(1/(n+1),(n)/(n+1),length.out=n); # marks n inner probabilities
  if (di=="norm") {quants = qnorm(probs,mean=m,sd=s);}
  if (di=="lnorm") {quants = qlnorm(probs,meanlog=m,sdlog=s);}
  quants = rev(round(quants/sum(quants)*su,digits=-2));
  return(data.frame(S1=quants[1],S2=quants[2],S3=quants[3],S4=quants[4],S5=quants[5],
                    distribution=di,stringsAsFactors = FALSE))
}
m=1700; s =  900; di="norm";Di=distr(n,su,di,m,s);
m=1700; s = 1700; di="norm";Di[2,]=distr(n,su,di,m,s);
m=log(1700); s = log(2); di="lnorm";Di[3,]=distr(n,su,di,m,s);
m=log(1700); s = log(1.6); di="lnorm";Di[4,]=distr(n,su,di,m,s);Di[4,subinds]=rev(3400-Di[4,subinds]);Di[4,"distribution"]="revlnorm";
m=log(1700); s = log(2.35); di="lnorm";Di[5,]=distr(n,su,di,m,s);Di[5,subinds]=rev(3400-Di[5,subinds]);Di[5,"distribution"]="revlnorm";
Di[6,subinds]=c(2600,1800,1700,1600,800);Di[6,"distribution"]="plateau";
Di$sum=apply(Di[,subinds],1,"sum");Di$sd=round(apply(Di[,subinds],1,"sd"),digits=0);
Di$skew=round(apply(Di[,subinds],1,"skewness"),digits=2);Di$kurt=round(apply(Di[,subinds],1,"kurtosis"),digits=2);
Di$taxRatGroupDec=sign(apply(Di[,paste("S",1:5,sep="")]<1700,1,sum)-apply(Di[,paste("S",1:5,sep="")]>1700,1,sum))*0.5+0.5;
dists = Di;

## CREATE DATASET (without calculator sessions and some variable renaming)
obs = read.csv("observations.csv");
# Replace GroupDecision with NA when DecisionReached==0
obs[obs$DecisionReached==0,"GroupDecision"]=NA;
# Renaming and further variables
names(obs)=sub("Proposal","P",names(obs));#nums = sub("Proposal","P",nums);
names(obs)=sub("TimeOKCommunicationAndDecisionNewOpinionMake","T",names(obs));
nums = c("ideal", paste("P",1:10,sep=""),"Decision","GroupDecision");
vars = c("SessionID","Group","Subject","Period","BruttoIncome",nums,"DecisionReached",
         paste("T",1:10,sep=""),"frame","calculator","distribution","randomOrder");
# Extract needed data
obs=obs[obs$calculator=="nocalc",vars];
obs$taxRatPref=100*(0.5-sign(obs$BruttoIncome-1700)*0.5); # Rationally prefered tax rate
# min2tax and tax2min
obs[,paste("min",nums,sep="")]=obs[,nums];
obs[,paste("tax",nums,sep="")]=obs[,nums];
for (i in which(obs$frame=="tax")) {obs[i,paste("min",nums,sep="")] = tax2min(as.vector(t(dists[obs[i,"distribution"],c("S1","S2","S3","S4","S5")])),obs[i,nums]/100) ;}
for (i in which(obs$frame=="min")) {obs[i,paste("tax",nums,sep="")] = min2tax(as.vector(t(dists[obs[i,"distribution"],c("S1","S2","S3","S4","S5")])),obs[i,nums])*100 ;}
# groups dataset
groups = aggregate(obs[,c("ideal","Decision","GroupDecision",
                          "taxideal","taxDecision","taxGroupDecision",
                          "taxRatPref")],
                   obs[,c("SessionID","Period","Group","distribution","frame","DecisionReached")],"median") 

## VARIBALE DOCUMENTATION (columns), cf. Description of Experiment and Instructions
# SessionID: Identifier of Experimental Session (ztree-Standard); Values: 140717_0908, 140717_1258, 140731_1029, 140731_1254
# Group: Group-Identifier within Session and Period; Values: 1,2,3,4
# Subject: Subject-Identifier within Session; Values: 1,...,20
# Period: Period-Identifier within Session; Values: 1,...,6 
# distribution: Identifier which distribution was used (the same for all subjects in one Period in one Session); Values: 1,...,6
# frame: Identifier of experimental frame (the same for all in one Session); Values: min, tax
# BruttoIncome: Initial endowment of subject in that Period; Values: cf. Distributions table, was randomly assigned
# ideal: value subject entered when asked fom its ideal minimal income (MIN frame)/ tax rate (TAX frame); 
#        Values: 100,...,1700 (MIN frame)/ 0,...,100 (TAX frame)
# Decision: decision subject entered after the communication phase, from the entries the collective decision (majority) was extracted
#        Values: 100,...,1700 (MIN frame)/ 0,...,100 (TAX frame)
# GroupDecision: Group decision extracted from the decisions in the group of the subject
#        Values: NA when less than three subjects entered the same decision, 100,...,1700 (MIN frame)/ 0,...,100 (TAX frame)
# DecisionReached: marks if the group reached a decision; Values: 0 (if group failed to reach a decision, then GroupDecision=NA), 1
# taxideal, taxDecision, taxGroupDecision: Values of ideal, Decision, GroupDecision computed transfered to tax rates (cf. Sec. 2 of the paper)
# taxRatPref: Rationally egoistically prefered tax rate subject with respect to the distribution in the current group 
#        Values: 0, 0.5 (=indifferent), 100

## OUTPUT

# Basic numbers (compare to numbers in manuscript text)
# obs ideal
aggregate(obs$taxideal,list(frame=obs$frame),"mean")
table(obs[obs$frame=="min","minideal"])
tab=table(obs[obs$frame=="tax","taxideal"])
sum(tab[as.numeric(names(tab))<=50])
# group decision
aggregate(groups$taxGroupDecision,list(frame=groups$frame),"mean",na.rm=TRUE)
table(groups[groups$frame=="min","minGroupDecision"])
tab=table(groups[groups$frame=="tax","taxGroupDecision"])
sum(tab[as.numeric(names(tab))<=50])
aggregate(groups$DecisionReached,list(frame=groups$frame),"sum")
# rational prediction
rat=table(obs$taxRatPref)
aggregate(obs[,c("taxRatPref","taxideal","taxDecision")],list(distribution=obs$distribution,frame=obs$frame),"mean")
sum(rat*as.numeric(names(rat)))/sum(rat)
rat=table(groups$taxRatPref)
sum(rat*as.numeric(names(rat)))/sum(rat)


# Fig 1. Distributions
pdf("fig1_dists.pdf",width=7,height=3);
par(mar=c(2.2,3,2,9.7)+0.1)
d=as.data.frame(t(dists[,paste("S",1:5,sep="")]));
cols=sapply(as.list(d), function(x) cut(x,c(0,1699,1701,10000),labels=c(rgb(.2,.2,.2),rgb(.5,.5,.5),rgb(0.8,0.8,0.8))))
barplot(t(dists[,paste("S",1:5,sep="")]),beside=TRUE,ylim=c(0,3500),main="Initial endowments in groups",yaxt="n",
        col=cols,xlab="Distribution")
axis(2,at=c(0,100,800,1700,3300),labels=c(NA,100,800,1700,3300),las=2)
lines(c(0.5,48.5),c(1700,1700),col="red",lwd=2)
legend("right",c("ideal point 0%","indifferent","ideal point 100%"),inset=c(-0.4,0),xpd=TRUE,
       pch=c(15),col=c(rgb(0.8,0.8,0.8),rgb(.5,.5,.5),rgb(.2,.2,.2)),bty="n",title="Egoistic preferences")
dev.off();

# Fig. 2 Histograms
for (gr in c("groups","obs")) {
  if (gr=="groups") {go = groups;} else {go=obs;}
  tab = as.vector(table(go[go$frame=="tax","taxRatPref"])); # Equal for both frames!
  pdf(file=paste('fig2_hist_',gr,'_rational.pdf',sep=''),height=4,width=7)
  par(mar=c(5,4,4,17)+0.1)
  plot.new();plot.window(xlim=c(0,100),ylim=c(0,switch(gr,groups=30,140)));
  axis(1);axis(2);
  title(xlab=paste("rational choice tax rate",switch(gr,groups="group median",""),"(%)")) ;
  mtext(switch(gr,groups="For comparison: \n Median egoistic \n preference","For comparison: \n Egoistic preferences"),side=3,
        line=switch(gr,groups=-3,-1.5),cex=1.5,font=2)
  barplot(rbind(rep(tab[2]/10,10),c(tab[1],rep(0,9)),c(rep(0,9),tab[3])),
          col=c(rgb(.5,.5,.5),rgb(.8,.8,.8),rgb(0.2,0.2,0.2)), width=10,space=0,add=TRUE); 
  legend("right",c("ideal point 0%","indifferent","ideal point 100%"),
         title=switch(gr,groups="Median egoisitic preference","Egoistic preferences"),
         inset=c(-1.15,0),xpd=TRUE,
         pch=c(15),col=c(rgb(0.8,0.8,0.8),rgb(.5,.5,.5),rgb(.2,.2,.2)),bty="n",cex=1.5)
  dev.off();
  for (v in c("taxideal","taxGroupDecision")) {
    for (f in c("tax","min")) {
      pdf(file=paste('fig2_hist_',gr,'_',f,'_',v,'.pdf',sep=''),height=4,width=4);
      h0=hist(go[go$frame==f & go$taxRatPref==0,v],breaks=seq(0,100,10),plot=FALSE);
      hI=hist(go[go$frame==f & go$taxRatPref==50,v],breaks=seq(0,100,10),plot=FALSE);
      h1=hist(go[go$frame==f & go$taxRatPref==100,v],breaks=seq(0,100,10),plot=FALSE);
      maxy=max(h0$counts+hI$counts+h1$counts);
      plot.new();
      plot.window(xlim=c(0,100),ylim=c(0,switch(gr,groups=32,140)));
      axis(1);axis(2);title(#main=paste(toupper(f)," frame (",nrow(go)/2,switch(gr,groups=" groups)"," observations)"),sep=""),
        xlab=paste("tax rate",tolower(sub("Group","group ",sub("tax","",v))),"(%)" ));
      mtext(paste(toupper(f)," frame",sep=""),side=3,line=-1,cex=1.5,font=2)
      barplot(rbind(h0$counts,hI$counts,h1$counts),col=c(rgb(.8,.8,.8),rgb(.5,.5,.5),rgb(0.2,0.2,0.2)), width=10,space=0,add=TRUE); 
      dev.off();
    }
  }
}


# Fig. 3 Means over distributions with errorbars ALL
for (gr in c("groups","obs")) {
  for (calc in c("both")) {
    if (gr=="groups") {go = groups;} else {go=obs;}
    MeanCI=aggregate(go[,c("taxideal","taxDecision","taxRatPref")],list(distribution=go[,"distribution"],frame=go[,"frame"]),"mean");
    MeanCI[,paste(c("taxideal","taxDecision"),"CI",sep="")]=aggregate(go[,c("taxideal","taxDecision")],
                                                                      list(distribution=go[,"distribution"],frame=go[,"frame"]),
                                                                      "std.error")[,c("taxideal","taxDecision")]
    pdf(file=paste('fig3_means_',gr,'_all_',calc,'.pdf',sep=''),height=5,width=4.5);
    plot(-1,-1,
         xlab="Endowment distribution",ylab="Average tax rate (%)",xlim=c(0.7,6.3),ylim=c(0,100));
    for (i in 1:6) {lines(c(i-0.17,i+0.07),MeanCI[MeanCI$distribution==i & MeanCI$frame=="min",c("taxideal","taxDecision")],col="lightblue");
      lines(c(i-0.07,i+0.17),MeanCI[MeanCI$distribution==i & MeanCI$frame=="tax",c("taxideal","taxDecision")],col="gray");}
    v="taxideal";
    plotCI(MeanCI[MeanCI$frame=="min","distribution"]-0.17,MeanCI[MeanCI$frame=="min",v],1.96*MeanCI[MeanCI$frame=="min",paste(v,"CI",sep="")],
           col="blue",pch=20,add=TRUE)
    plotCI(MeanCI[MeanCI$frame=="tax","distribution"]-0.07,MeanCI[MeanCI$frame=="tax",v],1.96*MeanCI[MeanCI$frame=="tax",paste(v,"CI",sep="")],
           pch=20,add=TRUE);
    v="taxDecision";
    plotCI(MeanCI[MeanCI$frame=="min","distribution"]+0.07,MeanCI[MeanCI$frame=="min",v],1.96*MeanCI[MeanCI$frame=="min",paste(v,"CI",sep="")],col="blue",pch=15,add=TRUE)
    plotCI(MeanCI[MeanCI$frame=="tax","distribution"]+0.17,MeanCI[MeanCI$frame=="tax",v],1.96*MeanCI[MeanCI$frame=="tax",paste(v,"CI",sep="")],pch=15,add=TRUE);  
    legend(0.8,22,c("MIN frame","TAX frame"),col=c("blue","black"),lwd=c(1,1),bty="n")
    legend(3.5,22,c("Ideal","Decision"),col=c("darkblue","darkblue"),pch=c(20,15),bty="n")
    dev.off();
  }
}